"""
Centralized path validation utilities for security.

This module provides secure path validation to prevent path traversal attacks
and other filesystem-based security vulnerabilities.
"""

import os
import re
from pathlib import Path
from typing import Optional, Union

from loguru import logger
from werkzeug.security import safe_join


class PathValidator:
    """Centralized path validation for security."""

    # Regex for safe filename/path characters
    SAFE_PATH_PATTERN = re.compile(r"^[a-zA-Z0-9._/\-]+$")

    # Allowed config file extensions
    CONFIG_EXTENSIONS = (".json", ".yaml", ".yml", ".toml", ".ini", ".conf")

    @staticmethod
    def validate_safe_path(
        user_input: str,
        base_dir: Union[str, Path],
        allow_absolute: bool = False,
        required_extensions: Optional[tuple] = None,
    ) -> Optional[Path]:
        """
        Validate and sanitize a user-provided path.

        Args:
            user_input: The user-provided path string
            base_dir: The safe base directory to contain paths within
            allow_absolute: Whether to allow absolute paths (with restrictions)
            required_extensions: Tuple of required file extensions (e.g., ('.json', '.yaml'))

        Returns:
            Path object if valid, None if invalid

        Raises:
            ValueError: If the path is invalid or unsafe
        """
        if not user_input or not isinstance(user_input, str):
            raise ValueError("Invalid path input")

        # Strip whitespace
        user_input = user_input.strip()

        # Use werkzeug's safe_join for secure path joining
        # This handles path traversal attempts automatically
        base_dir = Path(base_dir).resolve()

        try:
            # safe_join returns None if the path tries to escape base_dir
            safe_path = safe_join(str(base_dir), user_input)

            if safe_path is None:
                logger.warning(f"Path traversal attempt blocked: {user_input}")
                raise ValueError("Invalid path - potential traversal attempt")

            result_path = Path(safe_path)

            # Check extensions if required
            if (
                required_extensions
                and result_path.suffix not in required_extensions
            ):
                raise ValueError(
                    f"Invalid file type. Allowed: {required_extensions}"
                )

            return result_path

        except Exception as e:
            logger.warning(
                f"Path validation failed for input '{user_input}': {e}"
            )
            raise ValueError(f"Invalid path: {e}") from e

    @staticmethod
    def validate_local_filesystem_path(
        user_path: str,
        restricted_dirs: Optional[list[Path]] = None,
    ) -> Path:
        """
        Validate a user-provided absolute filesystem path for local indexing.

        This is for features like local folder indexing where users need to
        access files anywhere on their own machine, but system directories
        should be blocked.

        Args:
            user_path: User-provided path string (absolute or with ~)
            restricted_dirs: List of restricted directories to block

        Returns:
            Validated and resolved Path object

        Raises:
            ValueError: If path is invalid or points to restricted location
        """
        import sys

        if not user_path or not isinstance(user_path, str):
            raise ValueError("Invalid path input")

        user_path = user_path.strip()

        # Basic sanitation: forbid null bytes and control characters
        if "\x00" in user_path:
            raise ValueError("Null bytes are not allowed in path")
        if any(ord(ch) < 32 for ch in user_path):
            raise ValueError("Control characters are not allowed in path")

        # Expand ~ to home directory
        if user_path.startswith("~"):
            home_dir = Path.home()
            relative_part = user_path[2:].lstrip("/")
            if relative_part:
                user_path = str(home_dir / relative_part)
            else:
                user_path = str(home_dir)

        # Disallow malformed Windows paths (e.g. "/C:/Windows")
        if (
            sys.platform == "win32"
            and user_path.startswith(("/", "\\"))
            and ":" in user_path
        ):
            raise ValueError("Malformed Windows path input")

        # Block path traversal patterns before resolving
        # This explicit check helps static analyzers understand the security intent
        if ".." in user_path:
            raise ValueError("Path traversal patterns not allowed")

        # Use safe_join to sanitize the path - this is recognized by static analyzers
        # For absolute paths, we validate against the root directory
        if user_path.startswith("/"):
            # Unix absolute path - use safe_join with root
            safe_path = safe_join("/", user_path.lstrip("/"))
            if safe_path is None:
                raise ValueError("Invalid path - failed security validation")
            validated_path = Path(safe_path).resolve()
        elif len(user_path) > 2 and user_path[1] == ":":
            # Windows absolute path (e.g., C:\Users\...)
            drive = user_path[:2]
            rest = user_path[2:].lstrip("\\").lstrip("/")
            safe_path = safe_join(drive + "\\", rest)
            if safe_path is None:
                raise ValueError("Invalid path - failed security validation")
            validated_path = Path(safe_path).resolve()
        else:
            # Relative path - resolve relative to current directory
            # Use safe_join to validate
            cwd = os.getcwd()
            safe_path = safe_join(cwd, user_path)
            if safe_path is None:
                raise ValueError("Invalid path - failed security validation")
            validated_path = Path(safe_path).resolve()

        # Default restricted directories
        if restricted_dirs is None:
            restricted_dirs = [
                Path("/etc"),
                Path("/sys"),
                Path("/proc"),
                Path("/dev"),
                Path("/root"),
                Path("/boot"),
                Path("/var/log"),
            ]
            # Add Windows system directories if on Windows
            if sys.platform == "win32":
                for drive in ["C:", "D:", "E:"]:
                    restricted_dirs.extend(
                        [
                            Path(f"{drive}\\Windows"),
                            Path(f"{drive}\\System32"),
                            Path(f"{drive}\\Program Files"),
                            Path(f"{drive}\\Program Files (x86)"),
                        ]
                    )

        # Check against restricted directories
        for restricted in restricted_dirs:
            if validated_path.is_relative_to(restricted):
                logger.error(
                    f"Security: Blocked access to restricted directory: {validated_path}"
                )
                raise ValueError("Cannot access system directories")

        return validated_path

    @staticmethod
    def sanitize_for_filesystem_ops(validated_path: Path) -> Path:
        """
        Re-sanitize a validated path for static analyzer recognition.

        This method takes an already-validated Path and passes it through
        werkzeug's safe_join to create a path that static analyzers like
        CodeQL recognize as sanitized.

        Note: This exists because CodeQL doesn't trace through custom validation
        functions. The path is already secure from validate_local_filesystem_path(),
        but safe_join makes that explicit to static analyzers.

        Args:
            validated_path: A Path that has already been validated by
                          validate_local_filesystem_path()

        Returns:
            A Path object safe for filesystem operations

        Raises:
            ValueError: If the path fails sanitization
        """
        if not validated_path.is_absolute():
            raise ValueError("Path must be absolute")

        # Use safe_join to create a sanitized path that static analyzers recognize
        # safe_join handles path traversal detection properly (not substring matching)
        path_str = str(validated_path)
        safe_path_str = safe_join("/", path_str.lstrip("/"))
        if safe_path_str is None:
            raise ValueError("Path failed security sanitization")

        return Path(safe_path_str)

    @staticmethod
    def validate_model_path(
        model_path: str, model_root: Optional[str] = None
    ) -> Path:
        """
        Validate a model file path specifically.

        Args:
            model_path: Path to the model file
            model_root: Root directory for models (defaults to ~/.local/share/llm_models)

        Returns:
            Validated Path object

        Raises:
            ValueError: If the path is invalid
        """
        if model_root is None:
            # Default model root if not provided
            model_root = str(Path.home() / ".local" / "share" / "llm_models")

        # Create model root if it doesn't exist
        model_root_path = Path(model_root).resolve()
        model_root_path.mkdir(parents=True, exist_ok=True)

        # Validate the path
        validated_path = PathValidator.validate_safe_path(
            model_path,
            model_root_path,
            allow_absolute=False,  # Models should always be relative to model root
            required_extensions=None,  # Models can have various extensions
        )

        if not validated_path:
            raise ValueError("Invalid model path")

        # Check if the file exists
        if not validated_path.exists():
            raise ValueError(f"Model file not found: {validated_path}")

        if not validated_path.is_file():
            raise ValueError(f"Model path is not a file: {validated_path}")

        return validated_path

    @staticmethod
    def validate_data_path(file_path: str, data_root: str) -> Path:
        """
        Validate a path within the data directory.

        Args:
            file_path: Path relative to data root
            data_root: The data root directory

        Returns:
            Validated Path object

        Raises:
            ValueError: If the path is invalid
        """
        validated_path = PathValidator.validate_safe_path(
            file_path,
            data_root,
            allow_absolute=False,  # Data paths should be relative
            required_extensions=None,
        )

        if not validated_path:
            raise ValueError("Invalid data path")

        return validated_path

    @staticmethod
    def validate_config_path(
        config_path: str, config_root: Optional[str] = None
    ) -> Path:
        """
        Validate a configuration file path.

        Args:
            config_path: Path to config file
            config_root: Root directory for configs (optional for absolute paths)

        Returns:
            Validated Path object

        Raises:
            ValueError: If the path is invalid
        """
        # Sanitize input first - remove any null bytes and normalize
        if not config_path or not isinstance(config_path, str):
            raise ValueError("Invalid config path input")

        # Remove null bytes and normalize
        config_path = config_path.replace("\x00", "").strip()

        # Check for path traversal attempts in the string itself
        # Define restricted system directories that should never be accessed
        RESTRICTED_PREFIXES = ("etc", "proc", "sys", "dev")

        if ".." in config_path:
            raise ValueError("Invalid path - potential traversal attempt")

        # Check if path starts with any restricted system directory
        normalized_path = config_path.lstrip("/").lower()
        for restricted in RESTRICTED_PREFIXES:
            if (
                normalized_path.startswith(restricted + "/")
                or normalized_path == restricted
            ):
                raise ValueError(
                    f"Invalid path - restricted system directory: {restricted}"
                )

        # For config files, we might allow absolute paths with restrictions
        # Check if path starts with / or drive letter (Windows) to detect absolute paths
        # This avoids using Path() or os.path on user input
        is_absolute = (
            config_path.startswith("/")  # Unix absolute
            or (
                len(config_path) > 2 and config_path[1] == ":"
            )  # Windows absolute
        )

        if is_absolute:
            # For absolute paths, use safe_join with root directory
            # This validates the path without using Path() directly on user input
            # Use safe_join to validate the absolute path
            safe_path = safe_join("/", config_path)
            if safe_path is None:
                raise ValueError("Invalid absolute path")

            # Now it's safe to create Path object from validated string
            path_obj = Path(safe_path)

            # Additional validation for config files
            if path_obj.suffix not in PathValidator.CONFIG_EXTENSIONS:
                raise ValueError(f"Invalid config file type: {path_obj.suffix}")

            # Check existence using validated path
            if not path_obj.exists():
                raise ValueError(f"Config file not found: {path_obj}")

            return path_obj
        else:
            # For relative paths, use the config root
            if config_root is None:
                from ..config.paths import get_data_directory

                config_root = get_data_directory()

            return PathValidator.validate_safe_path(
                config_path,
                config_root,
                allow_absolute=False,
                required_extensions=PathValidator.CONFIG_EXTENSIONS,
            )
